library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.1
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
data <- read.csv('C:\\Users\\apoor_b31k2hq\\OneDrive\\Desktop\\Alekya\\ait 580\\shootings.csv')
head(data)
## X INCIDENT_KEY OCCUR_DATE OCCUR_TIME BORO LOC_OF_OCCUR_DESC PRECINCT
## 1 0 228798151 05/27/2021 21:30:00 QUEENS UNKNOWN 105
## 2 1 137471050 06/27/2014 17:40:00 BRONX UNKNOWN 40
## 3 2 147998800 11/21/2015 03:56:00 QUEENS UNKNOWN 108
## 4 3 146837977 10/09/2015 18:30:00 BRONX UNKNOWN 44
## 5 4 58921844 02/19/2009 22:58:00 BRONX UNKNOWN 47
## 6 5 219559682 10/21/2020 21:36:00 BROOKLYN UNKNOWN 81
## JURISDICTION_CODE LOC_CLASSFCTN_DESC LOCATION_DESC STATISTICAL_MURDER_FLAG
## 1 0.0 UNKNOWN UNKNOWN False
## 2 0.0 UNKNOWN UNKNOWN False
## 3 0.0 UNKNOWN UNKNOWN True
## 4 0.0 UNKNOWN UNKNOWN False
## 5 0.0 UNKNOWN UNKNOWN True
## 6 0.0 UNKNOWN UNKNOWN True
## PERP_AGE_GROUP PERP_SEX PERP_RACE VIC_AGE_GROUP VIC_SEX VIC_RACE
## 1 UNKNOWN UNKNOWN UNKNOWN 18-24 M BLACK
## 2 UNKNOWN UNKNOWN UNKNOWN 18-24 M BLACK
## 3 UNKNOWN UNKNOWN UNKNOWN 25-44 M WHITE
## 4 UNKNOWN UNKNOWN UNKNOWN <18 M WHITE HISPANIC
## 5 25-44 M BLACK 45-64 M BLACK
## 6 UNKNOWN UNKNOWN UNKNOWN 25-44 M BLACK
## X_COORD_CD Y_COORD_CD Latitude Longitude
## 1 1058925 180924.0 40.662964620000025 -73.73083868899994
## 2 1005028 234516.0 40.81035186300005 -73.92494232599995
## 3 1007668 209836.5 40.74260663300004 -73.91549174199997
## 4 1006537 244511.1 40.83778200300002 -73.91945661499993
## 5 1024922 262189.4 40.88623791800006 -73.85290950899997
## 6 1004234 186461.7 40.678456718000064 -73.92795224099996
## Lon_Lat
## 1 POINT (-73.73083868899994 40.662964620000025)
## 2 POINT (-73.92494232599995 40.81035186300006)
## 3 POINT (-73.91549174199997 40.74260663300004)
## 4 POINT (-73.91945661499994 40.83778200300003)
## 5 POINT (-73.85290950899997 40.88623791800006)
## 6 POINT (-73.92795224099996 40.678456718000064)
Univariate Analysis
# Renaming factor levels
data$STATISTICAL_MURDER_FLAG <- factor(data$STATISTICAL_MURDER_FLAG)
# Bar plot for the distribution of homicide cases
ggplot(data, aes(x = STATISTICAL_MURDER_FLAG, fill = STATISTICAL_MURDER_FLAG)) +
geom_bar() +
geom_text(stat='count', aes(label=..count..), vjust=-0.5,fontface = "bold") +
labs(title = 'Distribution of Homicide Cases',
x = 'Statistical Murder Flag', y = 'Count') +
scale_fill_manual(values = c("False" = "#FF9999", "True" = "#3399FF") , name = "Homicide Flag") +
theme_minimal()
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
boro_counts <- table(data$'BORO')
boro_df <- as.data.frame(boro_counts)
names(boro_df) <- c('BORO', 'Count')
# Define a custom color palette
my_colors <- c("BRONX" = "#99FFFF", "BROOKLYN" = "#FFCCCC", "MANHATTAN" = "#66CCCC",
"QUEENS" = "#FFFF99", "STATEN ISLAND" = "#CCCCFF")
# Plotting the pie chart
ggplot(boro_df, aes(x="", y=Count, fill=BORO)) +
geom_bar(width = 1, stat = "identity") +
coord_polar("y", start=0) +
theme_void() +
labs(title = 'Frequency of Incidents by Borough') +
theme(legend.title = element_blank()) +
scale_fill_manual(values = my_colors) +
geom_text(aes(label = paste0(round(Count/sum(Count)*100, 1), "%"),fontface = "bold"),
position = position_stack(vjust = 0.5))
# Prepare the data for plotting
data_count <- as.data.frame(table(data$BORO, data$STATISTICAL_MURDER_FLAG))
names(data_count) <- c('BORO', 'STATISTICAL_MURDER_FLAG', 'Count')
# Convert STATISTICAL_MURDER_FLAG to a factor
data_count$STATISTICAL_MURDER_FLAG <- as.factor(data_count$STATISTICAL_MURDER_FLAG)
# Define custom colors
color_map <- c('False' = '#2ca02c', 'True' = '#ff7f0e')
# Create a bar plot
ggplot(data_count, aes(x = BORO, y = Count, fill = STATISTICAL_MURDER_FLAG)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_text(aes(label = Count), vjust = -0.3,fontface = "bold", position = position_dodge(width = 0.9)) +
scale_fill_manual(values = color_map, name = "Homicide Flag") +
labs(title = 'Location vs. Homicide Cases', x = 'Borough', y = 'Count', fill = "Homicide Status") +
theme_minimal()
age_groups <- c('<18', '18-24', '25-44', '45-64', '65+')
# Counting the values for both perpetrator and victim age groups
perp_age_counts <- table(factor(data$PERP_AGE_GROUP, levels = age_groups))
vic_age_counts <- table(factor(data$VIC_AGE_GROUP, levels = age_groups))
# Creating the bar plot using Plotly
fig <- plot_ly() %>%
add_trace(x = names(perp_age_counts), y = as.numeric(perp_age_counts), type = 'bar', name = 'Perpetrator', marker = list(color = '#99CC99')) %>%
add_trace(x = names(vic_age_counts), y = as.numeric(vic_age_counts), type = 'bar', name = 'Victim', marker = list(color = '#9999FF')) %>%
layout(title = 'Distribution of Perpetrator and Victim Age Groups',
xaxis = list(title = 'Age Group'), yaxis = list(title = 'Count'), barmode = 'group')
# Show plot
fig
# Plot for Perpetrator Race
perp_race_counts <- data %>%
filter(PERP_RACE != "UNKNOWN") %>%
count(PERP_RACE) %>%
arrange(desc(n))
bar_color_perp <- "palevioletred"
perp_race_counts$PERP_RACE <- factor(perp_race_counts$PERP_RACE, levels = perp_race_counts$PERP_RACE)
fig_perp <- plot_ly(data = perp_race_counts, x = ~PERP_RACE, y = ~n, type = 'bar', name = 'Perpetrator Race',
marker = list(color = bar_color_perp),
hoverinfo = 'y') %>%
layout(title = 'Distribution of Perpetrator Race', xaxis = list(title = 'Race'), yaxis = list(title = 'Count'))
# Show plot for Perpetrator Race
fig_perp
# Plot for Victim Race
vic_race_counts <- data %>%
filter(VIC_RACE != "UNKNOWN") %>%
count(VIC_RACE) %>%
arrange(desc(n))
bar_color_vic <- "tomato"
vic_race_counts$VIC_RACE <- factor(vic_race_counts$VIC_RACE, levels = vic_race_counts$VIC_RACE)
fig_vic <- plot_ly(data = vic_race_counts, x = ~VIC_RACE, y = ~n, type = 'bar', name = 'Victim Race',
marker = list(color = bar_color_vic),
hoverinfo = 'y') %>%
layout(title = 'Distribution of Victim Race', xaxis = list(title = 'Race'), yaxis = list(title = 'Count'))
# Show plot for Victim Race
fig_vic